{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3bf0068d-9f6f-4c61-bc08-47b9594254d2",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!git clone https://github.com/deepseek-ai/DeepSeek-Coder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "14a9e9c5-07b0-4d82-9dde-06559bb14ea2",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!pip install deepspeed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# if you want to clone without large files – just their pointers\n",
    "!apt install git-lfs"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "8fd2eb721d14953a"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f8817387-03de-4fb7-9d6f-63d41e1db328",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Make sure you have git-lfs installed (https://git-lfs.com)\n",
    "!git lfs install\n",
    "!git clone https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "!cd DeepSeek-Coder/finetune && pip install -r requirements.txt"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "8ee1f87d90fc2a8e"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f42cbcf2-09a1-4560-9a61-33a16ad220ea",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "DATA_PATH=\"/openbayes/home/summary.jsonl\"\n",
    "OUTPUT_PATH=\"/openbayes/home/output\"\n",
    "\n",
    "# NotImplementedError: Using RTX 3090 or 4000 series doesn't support faster communication broadband via P2P or IB. Please\n",
    "# set `NCCL_P2P_DISABLE=\"1\"` and `NCCL_IB_DISABLE=\"1\" or use `accelerate launch` which will do this automatically.\n",
    "# \n",
    "# !NCCL_P2P_DISABLE=1\n",
    "# !NCCL_IB_DISABLE=1\n",
    "\n",
    "MODEL_PATH=\"/openbayes/home/deepseek-coder-6.7b-instruct\"\n",
    "\n",
    "!cd DeepSeek-Coder/finetune && deepspeed finetune_deepseekcoder.py \\\n",
    "    --model_name_or_path $MODEL_PATH \\\n",
    "    --data_path $DATA_PATH \\\n",
    "    --output_dir $OUTPUT_PATH \\\n",
    "    --num_train_epochs 1 \\\n",
    "    --model_max_length 768 \\\n",
    "    --per_device_train_batch_size 16 \\\n",
    "    --per_device_eval_batch_size 1 \\\n",
    "    --gradient_accumulation_steps 4 \\\n",
    "    --evaluation_strategy \"no\" \\\n",
    "    --save_strategy \"no\" \\\n",
    "    --save_steps 50 \\\n",
    "    --save_total_limit 10 \\\n",
    "    --learning_rate 4e-5 \\\n",
    "    --warmup_steps 10 \\\n",
    "    --logging_steps 1 \\\n",
    "    --lr_scheduler_type \"cosine\" \\\n",
    "    --gradient_checkpointing True \\\n",
    "    --report_to \"tensorboard\" \\\n",
    "    --deepspeed configs/ds_config_zero3.json \\\n",
    "    --bf16 True"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
